1 About

This script contains a regression-based analysis of the inference SPR data based on the rERP framework (Smith & Kutas 2015a, 2015b) as adapted and generalized to reading times by Brouwer, Delogu & Crocker (2021) and Aurnhammer et al. (2021).

2 Read data and preprocess

In this section:

Read in data and remove fillers, practice and non-critical regions, extract sentence-final word:

# Read data
df <- read.csv("results_reads.csv", header = TRUE)

# Exclude filler and practice trials
df <- removeFillersPractice(df)

# Extract sentence-final word
dfinal <- data.frame(aggregate(WordNum ~ Item,
          data = df, 
          FUN = max))
colnames(dfinal) <- c("Item", "Final")
df <- merge(df, dfinal)
df.final <- filter(df, WordNum == Final)
final <- aggregMeans(df.final, as.formula(RT ~ Cond))
df$Final <- NULL

# Exclude non-critical regions
df <- removeRegions(df, keep.regions=-1:2)

Next, the raw data are visualized with histograms and QQ plots before removing outliers.

Histograms and residual plots:

2.1 Remove SPR outliers

# Optional: remove items with low accuracy in B
#df <- filter(df, ! Item %in% c(5,13,25))

# Remove outliers: SPR (word RTs) - SD
df <- removeOutliersSPR(df,
                        method = "sd",
                        sd.value = 4,
                        entire.trial = TRUE,
                        regions = -1:2)
## Filtering RTs using method: SD by participant (cutoff value: +- 4 SD(s))
## Mean by subject RT ranges from 161 ms to 557 ms.
## Largest deviation from the by subject mean in ms: 772
## Removing outliers values affected 3.37% of the data.
## Data points lost: 216

# Check the remaining range of data points remaining from each subject:
range(xtabs(~ Subject, data=df))
## [1] 144 160
# This shows that between 0 and 16 datapoints were removed per subject. 

# Uncomment to test the impact of excluding the fastest + slowest subject
#df <- filter(df, !Subject %in% c(6, 20))

Histograms and residual plots after removing SPR outliers:

Normality test (Shapiro-Wilk):

# Untransformed RTs:
shapiro.test(filter(df, Region >= 0)$RT)
## 
##  Shapiro-Wilk normality test
## 
## data:  filter(df, Region >= 0)$RT
## W = 0.89975, p-value < 2.2e-16

# Log-transformed RTs:
shapiro.test(log(filter(df, Region >= 0)$RT))
## 
##  Shapiro-Wilk normality test
## 
## data:  log(filter(df, Region >= 0)$RT)
## W = 0.99286, p-value = 1.74e-14

2.2 Remove context sentence RT outliers and incorrectly answered trials

# Remove outliers: context sentence reading times
df <- removeOutliersContext(df, min.rt=500, max.rt=30000)
## Removing outliers values affected 0.65% of the data.
## Data points lost: 40

# Remove trials with incorrect plausibility ratings
df <- removeIncorrect(df)
## Removing outliers values affected 6.71% of the data.
## Data points lost: 412

# Visualize context sentence reading times with a histogram:
par(mfrow=(c(1,2)))
hist(
    filter(df, Region == 0)$ContextRT,
    breaks = 50,
    main = "Untransformed Context Sentence RTs",
    xlab = "Context RTs (ms)",
    col = "steelblue"
    # main = NULL
)
hist(
    log(filter(df, Region == 0)$ContextRT),
    breaks = 50,
    main = "Log-transformed Context Sentence RTs",
    xlab = "Context RTs (log-ms)",
    col = "steelblue"
)

par(mfrow=(c(1,1))) # reset
## [1] 2557 2576 2325 2442
## [1] 254 269 189 210

2.3 Further preprocessing: transformations etc.

In this subsection:

  • Rename variables
  • Transform to factor when necessary
  • Add precritical reading times as predictors
  • Add log-transformed versions of the reading times (the SPR RT (DV) as well as predictors (IVs))
  • Computer interaction terms
  • Standardize the predictors
# Rename variables 
rating_colnames <- str_remove(names(df), "Rating")
names(df) <- rating_colnames
df <- rename(df, ProdNorms = ProductionPercentage)

# Transform variables to factors
df$Cond <- as.factor(df$Cond)
df$Item <- as.factor(as.numeric(as.character(df$Item)))
df$Subject <- as.factor(as.numeric(as.character(df$Subject)))

# Create binary predictors for coherence and association
df$BinaryCoherence <- as.factor(ifelse(df$Plausible == "Yes", "coherent", "incoherent"))
df$BinaryAssociation <- as.factor(ifelse(df$Cond %in% c("A", "C"), "associated", "unassociated"))

# Extract precritical RT -1 and add to df:
df.precrit <- filter(df, Region == -1) %>% 
  dplyr::select(Item, Cond, Subject, RT) %>% 
  rename(RT_precrit = RT)
df <- merge(df, df.precrit, by = c("Item", "Cond", "Subject"))

# Add target length 
df <- df %>% mutate(Length = nchar(Target))

# Add log-transformed variables (DV + IVs)
df$logRT <- log(df$RT)
df$logRT_precrit <- log(df$RT_precrit)
df$logProdNorms <- ifelse(df$ProdNorms > 0,
                          log(df$ProdNorms), 0.01)  # smoothe for 0
df$logAssociation <- log(df$Association)
df$logInference <- log(df$Inference)
df$logCoherence <- log(df$Coherence)

# Add interaction variables
df$AssocInference <- df$Association*df$Inference
df$AssocCoherence <- df$Association*df$Coherence

# Order df by Region
df <- arrange(df, Region)

# Scale predictors (careful: needs to be performed AFTER removing data)
df <- scalePredictors(
  df,
  c(
    "ProdNorms",
    "Association",
    "Inference",
    "Coherence",
    "AssocInference",
    "AssocCoherence",
    "RT_precrit",
    "logRT_precrit",
    "Length",
    "logProdNorms",
    "logAssociation",
    "logInference",
    "logCoherence"
  )
)

#str(df)

3 Observed RTs

3.1 Mean RTs by condition for each region of interest

Untransformed RTs
Cond precritical critical spillover1 spillover2
A 284 304 304 297
B 281 292 303 298
C 277 288 290 279
D 277 293 292 289
Log-transformed RTs
Cond precritical critical spillover1 spillover2
A 5.606 5.658 5.67 5.653
B 5.602 5.626 5.669 5.656
C 5.579 5.61 5.622 5.591
D 5.582 5.623 5.633 5.618

3.2 Plots

3.2.1 RTs by region and condition

3.2.2 Density plot: spread in each region

Check the spread of the variation of the reading times in the critical regions.

Larger ranges implies that the residuals will be larger due to the increased amount of variation in that region.

3.2.3 Task effect: decreasing RTs over time

Check the presence of a task effect, i.e., decreasing RTs over the course of the experiment.

Only critical region (target noun) RTs are plotted.

(Note that across items, reversed list orders counterbalanced for trial position effects!)

Method 1: RT ~ Block

Method 2: RT ~ TrialNumber

4 Traditional analysis

Run a separate lmer model for each region with contrast-coded categorical predictor values and observe p-values.

(The precritical region is not modeled, since precritical RT is used as a predictor.)

# Set contrasts
#contrasts(df$Cond) <- car::contr.Sum(levels(df$Cond))
#contrasts(df$Cond)
contrasts(df$BinaryCoherence) <- contr.sum(2)
contrasts(df$BinaryAssociation) <- contr.sum(2)

contrasts(df$BinaryCoherence)
##            [,1]
## coherent      1
## incoherent   -1
contrasts(df$BinaryAssociation)
##              [,1]
## associated      1
## unassociated   -1

# Split data into different data frames for each region 
crit <- filter(df, Region==0)
spill1 <- filter(df, Region==1)
spill2 <- filter(df, Region==2)

#f <- as.formula(logRT ~ Cond + logRT_precrit + (1|Subject) + (1|Item))
f <- as.formula(logRT ~ BinaryCoherence*BinaryAssociation + logRT_precrit + (1|Subject) + (1|Item))
# With random slopes but no correlations:
#f <- as.formula(logRT ~ BinaryCoherence*BinaryAssociation + logRT_precrit + (1+logRT_precrit || Subject) + (1+logRT_precrit || Item))

#######################
### CRITICAL REGION ###
#######################
options(scipen = 999) 
summary(lmer(f, data=crit))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: f
##    Data: crit
## 
## REML criterion at convergence: -723.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.0084 -0.5273 -0.0718  0.4090  5.7841 
## 
## Random effects:
##  Groups   Name        Variance   Std.Dev.
##  Subject  (Intercept) 0.02821375 0.167969
##  Item     (Intercept) 0.00007792 0.008827
##  Residual             0.03119573 0.176623
## Number of obs: 1433, groups:  Subject, 40; Item, 40
## 
## Fixed effects:
##                                        Estimate  Std. Error          df t value
## (Intercept)                            5.625484    0.027005   35.731134 208.313
## BinaryCoherence1                       0.005462    0.004686 1358.726063   1.166
## BinaryAssociation1                     0.003753    0.004680 1360.247478   0.802
## logRT_precrit                          0.120666    0.007479 1401.794393  16.135
## BinaryCoherence1:BinaryAssociation1    0.011869    0.004678 1353.387884   2.537
##                                                Pr(>|t|)    
## (Intercept)                         <0.0000000000000002 ***
## BinaryCoherence1                                 0.2439    
## BinaryAssociation1                               0.4227    
## logRT_precrit                       <0.0000000000000002 ***
## BinaryCoherence1:BinaryAssociation1              0.0113 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) BnryC1 BnryA1 lgRT_p
## BinryChrnc1  0.002                     
## BnryAssctn1 -0.006 -0.026              
## logRT_prcrt  0.004 -0.053  0.007       
## BnryCh1:BA1 -0.004 -0.034  0.011 -0.019

##############################
### FIRST SPILLOVER REGION ### 
##############################

summary(lmer(f, data=spill1))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: f
##    Data: spill1
## 
## REML criterion at convergence: -700.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.5310 -0.5497 -0.0902  0.4472  5.3649 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev.
##  Subject  (Intercept) 0.0230734 0.15190 
##  Item     (Intercept) 0.0001631 0.01277 
##  Residual             0.0318185 0.17838 
## Number of obs: 1433, groups:  Subject, 40; Item, 40
## 
## Fixed effects:
##                                        Estimate  Std. Error          df t value
## (Intercept)                            5.644631    0.024563   35.298231 229.803
## BinaryCoherence1                       0.014238    0.004733 1356.127057   3.008
## BinaryAssociation1                    -0.003710    0.004728 1357.551552  -0.785
## logRT_precrit                          0.113475    0.007518 1368.418211  15.093
## BinaryCoherence1:BinaryAssociation1    0.003477    0.004725 1350.842492   0.736
##                                                 Pr(>|t|)    
## (Intercept)                         < 0.0000000000000002 ***
## BinaryCoherence1                                 0.00268 ** 
## BinaryAssociation1                               0.43271    
## logRT_precrit                       < 0.0000000000000002 ***
## BinaryCoherence1:BinaryAssociation1              0.46188    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) BnryC1 BnryA1 lgRT_p
## BinryChrnc1  0.002                     
## BnryAssctn1 -0.007 -0.026              
## logRT_prcrt  0.005 -0.052  0.007       
## BnryCh1:BA1 -0.005 -0.034  0.011 -0.019

###############################
### SECOND SPILLOVER REGION ### 
###############################

summary(lmer(f, data=spill2))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: f
##    Data: spill2
## 
## REML criterion at convergence: -676.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.2242 -0.5552 -0.0595  0.4489  4.8636 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept) 0.016697 0.12921 
##  Item     (Intercept) 0.001049 0.03239 
##  Residual             0.032110 0.17919 
## Number of obs: 1433, groups:  Subject, 40; Item, 40
## 
## Fixed effects:
##                                        Estimate  Std. Error          df t value
## (Intercept)                            5.626019    0.021594   37.443060 260.531
## BinaryCoherence1                       0.018852    0.004760 1351.548656   3.961
## BinaryAssociation1                    -0.008865    0.004755 1352.135805  -1.864
## logRT_precrit                          0.115335    0.007514 1264.588472  15.348
## BinaryCoherence1:BinaryAssociation1    0.006526    0.004749 1348.202081   1.374
##                                                 Pr(>|t|)    
## (Intercept)                         < 0.0000000000000002 ***
## BinaryCoherence1                               0.0000786 ***
## BinaryAssociation1                                0.0625 .  
## logRT_precrit                       < 0.0000000000000002 ***
## BinaryCoherence1:BinaryAssociation1               0.1696    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) BnryC1 BnryA1 lgRT_p
## BinryChrnc1  0.002                     
## BnryAssctn1 -0.008 -0.027              
## logRT_prcrt  0.006 -0.052  0.007       
## BnryCh1:BA1 -0.006 -0.035  0.012 -0.019
options(scipen = 0)

5 rRT analysis

In the following analyses, each observed RT is replaced with an estimated RT obtained from a linear mixed effects model. Beyond analyzing the estimated RTs, this approach allows to examine how the estimates change dynamically as individual predictors are either a) incrementally added to the model (forward model construction), or b) controlled for by setting them to their grand mean value across conditions, which lets us inspect the contribution of individual predictors to goodness of fit on a by-condition and by-region basis.

In the first part of the analysis, forward model construction is used to investigate the impact of individual predictors and perform model selection. This approach allows to contrast untransformed with log-transformed versions of the same predictor, and to compare the collinear predictors Inference, Coherence and their interactions.

In the second part of the analysis, a technique based on neutralizing predictors in the full model is then used to gain insight into how the individual predictors – corresponding to specific properties of the stimulus presented at trial \(i\) – combine at a latent level in each trial to form the resulting RT.

Concretely, the signal for a specific stimulus at each trial \(i\) is broken down into a weighted sum of the individual predictors, plus the noise term \(\epsilon_i\):

\[ \begin{align} y_i &= \beta_0 + \beta_1 x_{1i} + \beta_2 x_{2i} + ... + \epsilon_i \\ & = \beta_0 + \sum_{j = 1}^N \beta_j x_{ji} + \epsilon_i \end{align} \]

where

The goal of the estimation process is to find the set of most optimal \(\hat{\beta}_j\)’s that minimizes \(\epsilon_i\) across trials, given the stimulus properties \(x_{ji}\).

The impact of each predictor in each region of interest can then be analyzed by inspecting the change in the residual error ensuing when the predictor is controlled for (i.e., when its coefficient is set to its mean value – which in the present analysis corresponds to setting it to zero, given that predictors were standardized to have a mean of \(0\) and a standard deviation of \(1\)). More precisely, an increase in residual error when controlling for a specific predictor in a model (and correspondingly a larger deviation of the estimated RTs from the observed RTs) indicates that this predictor is critical for reducing the error in that region. This provides a quantitative measure of goodness of model fit that allows to compare different models in which the impact of the individual predictors can be isolated (where closer approximation of \(\epsilon\) to \(0\) means better fit).

Finally, the effect structure of the observed RTs and that of the estimated rRTs can be analyzed and compared using statistical analysis:

General points:

5.1 Define parameters

Define general parameters:

  • regions of interest to model
  • untransformed or log-transformed RTs
  • based on RT unit (ms or log ms): ranges for the plot y-axis
  • which kind of models to run:
    • “lm”: simple LM
    • “lm.by.subj”: by-subject LMs
    • “lmer”: LMER
regions <- -1:2
log.RTs <- TRUE
m.type = "lmer"

if (log.RTs == TRUE) {
  DV <- "logRT"
  precrit <- "logRT_precrit"
  y.unit <- "Log RT"
  y.range <- c(5.52, 5.72)
  y.range.res <- c(-0.07, 0.07)
} else {
  DV <- "RT"
  precrit <- "RT_precrit"
  y.unit <- "RT (ms)"
  y.range <- c(260, 320)
  y.range.res <- c(-20, 20)
}

if (m.type == "lmer") {
  rand.ef <- " + (1|Subject) + (1|Item)"
} else {
  rand.ef = ""
}

For the final analyses, lmer models with log-transformed RTs as the DV were used.

5.2 Forward model construction

This section aims to assess the fit of individual predictors in isolation, starting from a simple baseline model containing only the intercept.

Aims:

  1. provide preliminary insights into the variance in the signal, and the impact of each predictor in isolation; then gradually build models by adding predictors.
  2. determine the impact of log-transformations of the predictors for the following predictors: Production Norms, Association, Inference, Coherence (and precriticalRT?)

Method:

  1. begin with a baseline (intercept only) model, then add the individual predictors of interest one by one
  2. compare residual plots for different versions of the same predictor (e.g. untransformed Association vs. log-transformed Association), using a subset of the experimental conditions for which there is variation in this predictor while variation is minimized among the other predictors

5.2.1 A) From baseline to full model

Following the logic of Brouwer et al. (2020), the analysis starts with a baseline model of the variance in the signal. The baseline model assumes that no factors were manipulated and that all trials belong to the same condition. Therefore, the resulting estimate for each trial in a given region will simply be the average over all trials in that region, with slight offsets due to the random effects. Building on this baseline model, predictors are then added in isolation until we arrive at the full model including an interaction term.

Note: In this section, I used lmer models with random intercepts for Subject and Item, but no random slopes, as the latter consistently led to convergence problems.

Next, the logRTs on the precritical region are included as a predictor in the baseline model to account for the baseline offset observed in the precritical region.

The pre-critical region will therefore no longer be modeled and plotted in the remainder of this section, since its RTs are used as a predictor, always leading to perfect fit in the precritical region.

For the other regions, the residual plot shows that including precritical RT alone as a predictor does not notably improve model fit:

In a next step, Association is added as a further predictor to the model, again not leading to drastic improvements in fit:

The next two plots add Coherence and Inference individually to the baseline model (Intercept + precritical RT), both leading to similar improvements in model fit:

We can also add both Inference and Coherence to the baseline model (on the assumption that collinearity does not pose a problem to linear regression per se, without performing inferential statistics):

So far, we have focused on assessing the influence of individual predictors in isolation (as well as the correlated Inference+Coherence).

In the next step, we can add Association to the previously constructed model (still following Smith & Kutas 2015 in keeping collinear predictors despite the potential for variance inflation):

Finally, we can also add the hypothesized interaction term for Association:Inference to the model (note that the interaction term is computed from the scaled predictors, instead of being computed individually from the raw predictors and then scaled as a product, since the latter approach leads to huge inflation of variance in the model coefficients):

Same model with more complex random effects structure (removing correlations, but already leads to singular fits):

## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')

This shows an interesting pattern in the coefficient and effect size plots: while Inference is determined by the model as having a positive-going and significant effect in the first spillover region and Coherence is not, this is reversed in the post-spillover region where Coherence is predicted to have a positive, significant effect while the effect of Inference even becomes numerically negative from the model’s point of view.

This then demonstrates how, when given collinear predictors, the model can exploit which predictor best maximizes fit in each region, and place positive or negative weights accordingly, irrespective of the correlation between the predictors.

This phenomenon however also makes the models more prone to overfit to noise in the data.

As a sanity check, we can also compute a model in which we remove the collinear predictor Coherence, to check if the effect of Inference then reverses in the post-spillover region:

As speculated, the reversal of the sign for Inference is exactly what is observed in the post-critical region, with a significant, positive effect for this predictor.

As another sanity check, we can also check if it helps to include TrialNumber as a predictor. However, as trial order of items and conditions was counterbalanced across lists, this should not lead to improvements in model fit, even if it will be a hightly significant predictor.

5.2.2 B) Modeling subsets of the data

We now turn to assessing the impact of individual predictors and transformations thereof in subsets of the full data. Each untransformed-logtransformed pair of predictors is analyzed in the subset of conditions where the pair shows maximal variation, while the other predictors remain constant. For each pair, a minimal lmer model is constructed containing only the Intercept + the predictor, as well as random intercepts for Subject+Item.

These data subsets were created:

  • a subset containing conditions A & B is used to model the impact of the Production Norms
  • a subset containing conditions C & D is used to model Association
  • a subset containing conditions A & C is used to model the impact of Inference/Coherence (TODO: alternatively, use ABC data frame? => see last 3 plots)
  • precritical RT is also tested on the A & C subset, mirroring the coherent vs. incoherent divide observed in the precritical region

The following plots show the model residuals with the untransformed variable on the left and the log-transformed variable on the right, respectively:

## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')

## boundary (singular) fit: see help('isSingular')

Observations:

  • Log-transformation of the predictor variable does not lead to improved fit, except very slightly for precritical RT in regions 0-2 (as predicted). Therefore, the original (scaled) versions of the predictors will be kept, except for precritical RT.
  • Subset conditions A + B:
    • The production norms can model the difference between A and B well except in the critical region
  • Subset conditions A + C:
    • Inference can model the difference between A and C well except in the post-spillover region
    • Coherence can model the difference between A and C rather well in all regions
    • Precritical region RT alone cannot model the difference between A and C well (except in the precritical region itself)
  • Subset conditions C + D:
    • Association can model the difference between C and D very well in the post-spillover region

5.3 Neutralizing predictors

5.3.1 Full model + coefficients and effect size

We start from the full chosen model:

5.3.2 Controlling individual predictors

Model formula for the full model:

logRT ~ logRT_precrit + Association + Coherence + Inference + Association:Inference

m.formula <- makeFormula(DV,
                         IVs = "1 + logRT_precrit + Coherence + Association +
                         Inference + Association:Inference",
                         rand.ef = rand.ef)

# Specify which random effects to include when generating model predictions
rand.ef.cols <- c("RanEfSubject", "RanEfItem")

m <- runModels(df,
               m.type,
               m.formula,
               regions)
## Chosen model: LMER

# Specify full model IVs:
model.ivs <- c(precrit, "Association", "Inference", "Coherence",
               "Association:Inference")

# Readjust the residuals y-axis (make larger, since larger residuals arise)
y.range.res <- c(-0.11, 0.11)

# Sanity check 1: remove ALL predictors (yields intercept model)
neutralizePredictors(m,
                     neutralize=model.ivs,
                     model.ivs,
                     rand.ef.cols = rand.ef.cols)
## Correcting for the Association:Inference mean...
## Including random effect: RanEfSubject
## Including random effect: RanEfItem


# Sanity check 2: remove all predictors except precritical RT
neutralizePredictors(m,
                     neutralize=model.ivs[! model.ivs == precrit],
                     model.ivs,
                     rand.ef.cols = rand.ef.cols)
## Correcting for the Association:Inference mean...
## Including random effect: RanEfSubject
## Including random effect: RanEfItem



### Neutralizing individual predictors
neutralizePredictors(m, neutralize=c("Association"), model.ivs,
                     rand.ef.cols = rand.ef.cols)
## Including random effect: RanEfSubject
## Including random effect: RanEfItem

neutralizePredictors(m, neutralize=c("Coherence"), model.ivs,
                     rand.ef.cols = rand.ef.cols)
## Including random effect: RanEfSubject
## Including random effect: RanEfItem

neutralizePredictors(m, neutralize=c("Inference"), model.ivs,
                     rand.ef.cols = rand.ef.cols)
## Including random effect: RanEfSubject
## Including random effect: RanEfItem

neutralizePredictors(m, neutralize=c("Association:Inference"), model.ivs,
                     rand.ef.cols = rand.ef.cols)
## Correcting for the Association:Inference mean...
## Including random effect: RanEfSubject
## Including random effect: RanEfItem

neutralizePredictors(m, neutralize=precrit, model.ivs,
                     rand.ef.cols = rand.ef.cols)
## Including random effect: RanEfSubject
## Including random effect: RanEfItem


### Isolating predictors (ie. neutralizing everything else except intercept)

# Keep only Association
neutralizePredictors(m,
                     neutralize=model.ivs[! model.ivs == "Association"],
                     model.ivs,
                     rand.ef.cols = rand.ef.cols)
## Correcting for the Association:Inference mean...
## Including random effect: RanEfSubject
## Including random effect: RanEfItem


# Keep only Inference
neutralizePredictors(m,
                     neutralize=model.ivs[! model.ivs == "Inference"],
                     model.ivs,
                     rand.ef.cols = rand.ef.cols)
## Correcting for the Association:Inference mean...
## Including random effect: RanEfSubject
## Including random effect: RanEfItem


# Keep only Coherence
neutralizePredictors(m,
                     neutralize=model.ivs[! model.ivs == "Coherence"],
                     model.ivs,
                     rand.ef.cols = rand.ef.cols)
## Correcting for the Association:Inference mean...
## Including random effect: RanEfSubject
## Including random effect: RanEfItem


# Keep only Interaction
neutralizePredictors(m,
                     neutralize=model.ivs[! model.ivs ==
                                            "Association:Inference"],
                     model.ivs,
                     rand.ef.cols = rand.ef.cols)
## Including random effect: RanEfSubject
## Including random effect: RanEfItem


### Isolate individual predictors, but also keep precriticalRT as predictor besides the intercept:

# Keep only Association+precrit
neutralizePredictors(m,
                     neutralize=model.ivs[! model.ivs %in% c(precrit, "Association")],
                     model.ivs,
                     rand.ef.cols = rand.ef.cols)
## Correcting for the Association:Inference mean...
## Including random effect: RanEfSubject
## Including random effect: RanEfItem


# Keep only Inference+precrit
neutralizePredictors(m,
                     neutralize=model.ivs[! model.ivs %in% c(precrit, "Inference")],
                     model.ivs,
                     rand.ef.cols = rand.ef.cols)
## Correcting for the Association:Inference mean...
## Including random effect: RanEfSubject
## Including random effect: RanEfItem


# Keep only Coherence+precrit
neutralizePredictors(m,
                     neutralize=model.ivs[! model.ivs %in% c(precrit, "Coherence")],
                     model.ivs,
                     rand.ef.cols = rand.ef.cols)
## Correcting for the Association:Inference mean...
## Including random effect: RanEfSubject
## Including random effect: RanEfItem



### Isolating pairs of variables 
# Keep only Inference + Coherence
neutralizePredictors(m,
                     neutralize=model.ivs[! model.ivs %in%
                                           c("Inference", "Coherence")],
                     model.ivs,
                     rand.ef.cols = rand.ef.cols)
## Correcting for the Association:Inference mean...
## Including random effect: RanEfSubject
## Including random effect: RanEfItem


# Keep Association + Coherence
neutralizePredictors(m,
                     neutralize=model.ivs[! model.ivs %in%
                                           c("Association", "Coherence")],
                     model.ivs,
                     rand.ef.cols = rand.ef.cols)
## Correcting for the Association:Inference mean...
## Including random effect: RanEfSubject
## Including random effect: RanEfItem


# Keep Association, Inference + Coherence
neutralizePredictors(m,
                     neutralize=model.ivs[! model.ivs %in%
                                           c("Association", "Inference", "Coherence")],
                     model.ivs,
                     rand.ef.cols = rand.ef.cols)
## Correcting for the Association:Inference mean...
## Including random effect: RanEfSubject
## Including random effect: RanEfItem

5.3.3 Comparing by-subject linear model, lmer with / without random intercepts for items

# Linear model by-subjects:
f1 <- makeFormula(DV, IVs = "1 + logRT_precrit + Coherence + Association*Inference",
                         rand.ef = "")
m1 <- runModels(df, "lm.by.subj", f1, regions)
## Chosen model: by-subject LMs

# LMER without by-item random intercepts:
f2 <- makeFormula(DV, IVs = "1 + logRT_precrit + Coherence + Association*Inference",
                         rand.ef = " + (1|Subject)")
m2 <- runModels(df, "lmer", f2, regions)
## Chosen model: LMER

# LMER with by-item random intercepts:
f3 <- makeFormula(DV, IVs = "1 + logRT_precrit + Coherence + Association*Inference",
                         rand.ef = " + (1|Subject) + (1|Item)")
m3 <- runModels(df, "lmer", f3, regions)
## Chosen model: LMER

y.range.res.modelcomp <-  c(-0.04, 0.04)
#y.range.res.modelcomp <-  c(-0.07, 0.07)
#y.range.res.modelcomp <-  c(-0.05, 0.05)
p1 <- plotSPR(m1, "Residuals", y.unit, y.range.res.modelcomp) +
  ggtitle("By-subject linear model")
p2 <- plotSPR(m2, "Residuals", y.unit, y.range.res.modelcomp) +
  ggtitle("LMER: subject intercepts")
p3 <- plotSPR(m3, "Residuals", y.unit, y.range.res.modelcomp) +
  ggtitle("LMER: subject + item intercepts")
p <- grid.arrange(p1, p2, p3, nrow=1)#,

                  #top = textGrob(paste0("Residuals: linear/lmer model with/without by-item random intercepts"),
                                 #gp=gpar(fontsize=14, fontface=2)))
ggsave(paste0("./plots/residual_comparison_item_random_intercepts.pdf"), p, 
       width = 10, height = 5, dpi = 300)